library(reshape2)
library(lubridate)
library(ggplot2)
theme_set(theme_bw(base_size=12))
library(cowplot)


################################################################################
# Overview Figure (Fig 1)

load("../Data/JournalAggregates.RData")

ag <- aggregate(cbind(TotM, TotF) ~ Date, jnl.sub, sum)
ag <- melt(ag, "Date")
levels(ag$variable) <- c("Men", "Women")

ag$feb.may <- NA
ag$feb.may[ag$Date >="2020-02-01" & ag$Date <= "2020-05-30"] <- "2020-03-31"
ag$feb.may[ag$Date >="2019-02-01" & ag$Date <= "2019-05-30"] <- "2019-03-31"
ag$feb.may[ag$Date >="2018-02-01" & ag$Date <= "2018-05-30"] <- "2018-03-31"
ag$feb.may <- as.POSIXct(ag$feb.may)
table(ag$feb.may, useNA="ifany")

notes <- data.frame(x="2019-03-31", y=0, propM=0, propF=0)
i <- 0
for (ddt in  as.character(unique(ag$feb.may)[-1])){
  sel <- subset(ag, feb.may==ddt)
  i <- i + 1
  notes[i,] <- data.frame(ddt, 195000, round(sum(sel$value[sel$variable=="Men"]), 2), round(sum(sel$value[sel$variable=="Women"]), 2))
}

notes$x <- as.POSIXct(notes$x)
notes$label <- paste0("Feb-May ", lubridate::year(notes$x), "\nWomen: ", round(notes$propF), "\nMen: ",  round(notes$propM))
notes

ymax <- 210000
subs.plt <- ggplot(subset(ag, Date < "2020-06-01" & Date >= "2018-01-01"), aes(x=Date, y=value, color=variable))  + scale_color_viridis_d() + labs(y="Total submissions", color="Gender:") + theme(legend.position="top") + xlim(as.POSIXct("2018-01-01"),as.POSIXct("2020-05-30"))  + geom_polygon(data=data.frame(x=as.POSIXct(c("2020-02-01","2020-02-01","2020-05-30", "2020-05-25")), y=c(0,ymax,ymax,0)), mapping=aes(x=x,y=y), fill="grey", alpha=0.25, inherit.aes=F) + geom_polygon(data=data.frame(x=as.POSIXct(c("2019-02-01","2019-02-01","2019-05-30", "2019-05-25")), y=c(0,ymax,ymax,0)), mapping=aes(x=x,y=y), fill="grey", alpha=0.25, inherit.aes=F) + geom_polygon(data=data.frame(x=as.POSIXct(c("2018-02-01","2018-02-01","2018-05-30", "2018-05-25")), y=c(0,ymax,ymax,0)), mapping=aes(x=x,y=y), fill="grey", alpha=0.2, inherit.aes=F) + ylim(0,ymax)

subs.plt <- subs.plt + geom_line(show.legend=F, size=1) +geom_label(data=notes, mapping=aes(x=x, y=y, label=label), inherit.aes=F, size=2)
subs.plt

# Reviews
ag2 <- aggregate(cbind(propAcceptedM, propAcceptedF) ~ Date, jnl.rev, mean, na.rm=T)
ag2 <- melt(ag2, "Date")
levels(ag2$variable) <- c("Men", "Women")

ag2.feb.may <- aggregate(cbind(acceptedM, declinedM, acceptedF, declinedF) ~ Date, subset(jnl.rev, (Date >="2020-02-01" & Date <= "2020-05-30") | (Date >="2019-02-01" & Date <= "2019-05-30") | (Date >="2018-02-01" & Date <= "2018-05-30") ), sum)
ag2.feb.may$year <- lubridate::year(ag2.feb.may$Date)

prop <- function(x) sum(x[,1])/sum(x)

prop(ag2.feb.may[ag2.feb.may$year==2018, c("acceptedM","declinedM")])
prop(ag2.feb.may[ag2.feb.may$year==2019, c("acceptedM","declinedM")])
prop(ag2.feb.may[ag2.feb.may$year==2020, c("acceptedM","declinedM")])

notes <- data.frame(
  x = as.POSIXct(c("2018-03-31", "2019-03-31", "2020-03-31")),
  y = rep(0.93, 3),
  propM = c(
    prop(ag2.feb.may[ag2.feb.may$year==2018, c("acceptedM","declinedM")]),
    prop(ag2.feb.may[ag2.feb.may$year==2019, c("acceptedM","declinedM")]),
    prop(ag2.feb.may[ag2.feb.may$year==2020, c("acceptedM","declinedM")])
  ),
  propF = c(
    prop(ag2.feb.may[ag2.feb.may$year==2018, c("acceptedF","declinedF")]),
    prop(ag2.feb.may[ag2.feb.may$year==2019, c("acceptedF","declinedF")]),
    prop(ag2.feb.may[ag2.feb.may$year==2020, c("acceptedF","declinedF")])
  )  
)
notes$propM <- round(notes$propM, 2)
notes$propF <- round(notes$propF, 2)
notes$label <- paste0("Feb-May ", lubridate::year(notes$x), "\nWomen: ", format(notes$propF, nsmall=2), "\nMen: ",  format(notes$propM, nsmall=2))
notes

revs.plt <- ggplot(subset(ag2, Date < "2020-06-01" & Date >= "2018-01-01"), aes(x=Date, y=value, color=variable, fill=variable)) + scale_color_viridis_d() + labs(y="Proportion of accepted reviews", fill="Gender:") + ylim(0,1) + geom_polygon(data=data.frame(x=as.POSIXct(c("2020-02-01","2020-02-01","2020-05-30", "2020-05-25")), y=c(0,1,1,0)), mapping=aes(x=x,y=y), fill="grey", alpha=0.25, inherit.aes=F) + geom_polygon(data=data.frame(x=as.POSIXct(c("2019-02-01","2019-02-01","2019-05-30", "2019-05-25")), y=c(0,1,1,0)), mapping=aes(x=x,y=y), fill="grey", alpha=0.25, inherit.aes=F) + geom_polygon(data=data.frame(x=as.POSIXct(c("2018-02-01","2018-02-01","2018-05-30", "2018-05-25")), y=c(0,1,1,0)), mapping=aes(x=x,y=y), fill="grey", alpha=0.25, inherit.aes=F)

revs.plt <- revs.plt  + geom_line(size=1, show.legend=F) + geom_label(data=notes, mapping=aes(x=x, y=y, label=label), inherit.aes=F, size=2)
revs.plt

legend <- get_legend(ggplot(subset(ag, Date < "2020-06-01" & Date >= "2018-01-01"), aes(x=Date, y=value, color=variable, fill=variable)) + geom_area(color="grey") + scale_fill_viridis_d() + labs(y="Total Accepted reviews", fill=""))

plot_grid(subs.plt, revs.plt, legend, labels=c("A","B", NULL), rel_widths=c(4.58,4.42,1), ncol=3)


################################################################################
# MeanAut Figure (Fig 2)

load("../Data/Authors.RData")

se <- function(x) sd(x)/sqrt(length(x))

breaks <- c(0, 20, 60)
HMS$TimeSinceFirstPubClass <- cut(HMS$TimeSinceFirstPub, breaks=breaks, include.lowest=T)
LS$TimeSinceFirstPubClass <- cut(LS$TimeSinceFirstPub, breaks=breaks, include.lowest=T)
PS$TimeSinceFirstPubClass <- cut(PS$TimeSinceFirstPub, breaks=breaks, include.lowest=T)
SSE$TimeSinceFirstPubClass <- cut(SSE$TimeSinceFirstPub, breaks=breaks, include.lowest=T)

ag.hms <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, HMS, mean)
ag.hms$se <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, HMS, se)[,3]
ag.hms$area <- "Health & Medicine"
ag.ls <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, LS, mean)
ag.ls$se <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, LS, se)[,3]
ag.ls$area <- "Life Sciences"
ag.ps <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, PS, mean)
ag.ps$se <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, PS, se)[,3]
ag.ps$area <- "Physical Sciences\n& Engineering"
ag.sse <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, SSE, mean)
ag.sse$se <- aggregate(delta.sub ~ Women1 + TimeSinceFirstPubClass, SSE, se)[,3]
ag.sse$area <- "Social Sciences\n& Economics"
ag <- rbind(ag.hms, ag.ls, ag.ps, ag.sse)
#levels(ag$Seniority) <- c("Not doctor", "Doctor", "Professor")
#ag$Women1 <- relevel(ag$Women1, ref="Women")

ggplot(ag, aes(x=Women1, y=delta.sub, ymin=delta.sub-se, ymax=delta.sub+se, fill=Women1, group=Women1)) + geom_col(color="darkgrey", show.legend=F) + geom_errorbar(width=0.5,color="darkgrey", show.legend=F) + labs(x="Gender", y="Average 2019-2020 change in submissions ", fill="") + scale_fill_viridis_d() + facet_grid(TimeSinceFirstPubClass ~ area, space="fixed", shrink=F) + theme(legend.position="bottom")


################################################################################
# MeanRefs Figure (Fig 3)

load("../Data/Referees.RData")

se <- function(x) sd(x)/sqrt(length(x))

breaks <- c(0, 20, 60)
HMS$TimeSinceFirstPubClass <- cut(HMS$TimeSinceFirstPub, breaks=breaks, include.lowest=T)
LS$TimeSinceFirstPubClass <- cut(LS$TimeSinceFirstPub, breaks=breaks, include.lowest=T)
PS$TimeSinceFirstPubClass <- cut(PS$TimeSinceFirstPub, breaks=breaks, include.lowest=T)
SSE$TimeSinceFirstPubClass <- cut(SSE$TimeSinceFirstPub, breaks=breaks, include.lowest=T)

ag.hms <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, HMS, mean)
ag.hms$se <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, HMS, se)[,3]
ag.hms$area <- "Health & Medicine"
ag.ls <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, LS, mean)
ag.ls$se <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, LS, se)[,3]
ag.ls$area <- "Life Sciences"
ag.ps <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, PS, mean)
ag.ps$se <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, PS, se)[,3]
ag.ps$area <- "Physical Sciences\n& Engineering"
ag.sse <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, SSE, mean)
ag.sse$se <- aggregate(deltaProp19.20 ~ Women1 + TimeSinceFirstPubClass, SSE, se)[,3]
ag.sse$area <- "Social Sciences\n& Economics"
ag <- rbind(ag.hms, ag.ls, ag.ps, ag.sse)
# levels(ag$Seniority) <- c("Not doctor", "Doctor", "Professor")
# ag$Women1 <- relevel(ag$Women1, ref="Women")

ggplot(ag, aes(x=Women1, y=deltaProp19.20, ymin=deltaProp19.20-se, ymax=deltaProp19.20+se, fill=Women1, group=Women1)) + geom_col(color="darkgrey", show.legend=F) + geom_errorbar(width=0.5,color="darkgrey",show.legend=F) + labs(x="Gender", y="Avg. 2019-2020 difference in the\nproportion of accepted invitations", fill="") + scale_fill_viridis_d() + facet_grid(TimeSinceFirstPubClass ~ area, space="fixed", shrink=F) + theme(legend.position="top") #+ scale_y_reverse()

